Best Python code snippet using playwright-python
plugin.py
Source:plugin.py
1import json2import logging3from typing import TYPE_CHECKING, Any, List, Optional, Union4from ...app.models import App5from ...core import EventDeliveryStatus6from ...core.models import EventDelivery7from ...core.notify_events import NotifyEventType8from ...core.utils.json_serializer import CustomJsonEncoder9from ...payment import PaymentError, TransactionKind10from ...webhook.event_types import WebhookEventAsyncType, WebhookEventSyncType11from ...webhook.payloads import (12 generate_checkout_payload,13 generate_collection_payload,14 generate_customer_payload,15 generate_excluded_shipping_methods_for_checkout_payload,16 generate_excluded_shipping_methods_for_order_payload,17 generate_fulfillment_payload,18 generate_invoice_payload,19 generate_list_gateways_payload,20 generate_meta,21 generate_order_payload,22 generate_page_payload,23 generate_payment_payload,24 generate_product_deleted_payload,25 generate_product_payload,26 generate_product_variant_payload,27 generate_product_variant_with_stock_payload,28 generate_requestor,29 generate_sale_payload,30 generate_translation_payload,31)32from ..base_plugin import BasePlugin, ExcludedShippingMethod33from .const import CACHE_EXCLUDED_SHIPPING_KEY34from .shipping import get_excluded_shipping_data, parse_list_shipping_methods_response35from .tasks import (36 _get_webhooks_for_event,37 send_webhook_request_async,38 trigger_webhook_sync,39 trigger_webhooks_async,40)41from .utils import (42 delivery_update,43 from_payment_app_id,44 parse_list_payment_gateways_response,45 parse_payment_action_response,46)47if TYPE_CHECKING:48 from ...account.models import User49 from ...checkout.models import Checkout50 from ...discount.models import Sale51 from ...graphql.discount.mutations import NodeCatalogueInfo52 from ...invoice.models import Invoice53 from ...order.models import Fulfillment, Order54 from ...page.models import Page55 from ...payment.interface import GatewayResponse, PaymentData, PaymentGateway56 from ...product.models import Collection, Product, ProductVariant57 from ...shipping.interface import ShippingMethodData58 from ...translation.models import Translation59 from ...warehouse.models import Stock60logger = logging.getLogger(__name__)61class WebhookPlugin(BasePlugin):62 PLUGIN_ID = "mirumee.webhooks"63 PLUGIN_NAME = "Webhooks"64 DEFAULT_ACTIVE = True65 CONFIGURATION_PER_CHANNEL = False66 @classmethod67 def check_plugin_id(cls, plugin_id: str) -> bool:68 is_webhook_plugin = super().check_plugin_id(plugin_id)69 if not is_webhook_plugin:70 payment_app_data = from_payment_app_id(plugin_id)71 return payment_app_data is not None72 return is_webhook_plugin73 def __init__(self, *args, **kwargs):74 super().__init__(*args, **kwargs)75 self.active = True76 def order_created(self, order: "Order", previous_value: Any) -> Any:77 if not self.active:78 return previous_value79 event_type = WebhookEventAsyncType.ORDER_CREATED80 if webhooks := _get_webhooks_for_event(event_type):81 order_data = generate_order_payload(order, self.requestor)82 trigger_webhooks_async(order_data, event_type, webhooks)83 def order_confirmed(self, order: "Order", previous_value: Any) -> Any:84 if not self.active:85 return previous_value86 event_type = WebhookEventAsyncType.ORDER_CONFIRMED87 if webhooks := _get_webhooks_for_event(event_type):88 order_data = generate_order_payload(order, self.requestor)89 trigger_webhooks_async(order_data, event_type, webhooks)90 def order_fully_paid(self, order: "Order", previous_value: Any) -> Any:91 if not self.active:92 return previous_value93 event_type = WebhookEventAsyncType.ORDER_FULLY_PAID94 if webhooks := _get_webhooks_for_event(event_type):95 order_data = generate_order_payload(order, self.requestor)96 trigger_webhooks_async(order_data, event_type, webhooks)97 def order_updated(self, order: "Order", previous_value: Any) -> Any:98 if not self.active:99 return previous_value100 event_type = WebhookEventAsyncType.ORDER_UPDATED101 if webhooks := _get_webhooks_for_event(event_type):102 order_data = generate_order_payload(order, self.requestor)103 trigger_webhooks_async(order_data, event_type, webhooks)104 def sale_created(105 self, sale: "Sale", current_catalogue: "NodeCatalogueInfo", previous_value: Any106 ) -> Any:107 if not self.active:108 return previous_value109 event_type = WebhookEventAsyncType.SALE_CREATED110 if webhooks := _get_webhooks_for_event(event_type):111 sale_data = generate_sale_payload(112 sale,113 previous_catalogue=None,114 current_catalogue=current_catalogue,115 requestor=self.requestor,116 )117 trigger_webhooks_async(sale_data, event_type, webhooks)118 def sale_updated(119 self,120 sale: "Sale",121 previous_catalogue: "NodeCatalogueInfo",122 current_catalogue: "NodeCatalogueInfo",123 previous_value: Any,124 ) -> Any:125 if not self.active:126 return previous_value127 event_type = WebhookEventAsyncType.SALE_UPDATED128 if webhooks := _get_webhooks_for_event(event_type):129 sale_data = generate_sale_payload(130 sale, previous_catalogue, current_catalogue, self.requestor131 )132 trigger_webhooks_async(sale_data, event_type, webhooks)133 def sale_deleted(134 self, sale: "Sale", previous_catalogue: "NodeCatalogueInfo", previous_value: Any135 ) -> Any:136 if not self.active:137 return previous_value138 event_type = WebhookEventAsyncType.SALE_DELETED139 if webhooks := _get_webhooks_for_event(event_type):140 sale_data = generate_sale_payload(141 sale, previous_catalogue=previous_catalogue, requestor=self.requestor142 )143 trigger_webhooks_async(sale_data, event_type, webhooks)144 def invoice_request(145 self,146 order: "Order",147 invoice: "Invoice",148 number: Optional[str],149 previous_value: Any,150 ) -> Any:151 if not self.active:152 return previous_value153 event_type = WebhookEventAsyncType.INVOICE_REQUESTED154 if webhooks := _get_webhooks_for_event(event_type):155 invoice_data = generate_invoice_payload(invoice, self.requestor)156 trigger_webhooks_async(invoice_data, event_type, webhooks)157 def invoice_delete(self, invoice: "Invoice", previous_value: Any):158 if not self.active:159 return previous_value160 event_type = WebhookEventAsyncType.INVOICE_DELETED161 if webhooks := _get_webhooks_for_event(event_type):162 invoice_data = generate_invoice_payload(invoice, self.requestor)163 trigger_webhooks_async(invoice_data, event_type, webhooks)164 def invoice_sent(self, invoice: "Invoice", email: str, previous_value: Any) -> Any:165 if not self.active:166 return previous_value167 event_type = WebhookEventAsyncType.INVOICE_SENT168 if webhooks := _get_webhooks_for_event(event_type):169 invoice_data = generate_invoice_payload(invoice, self.requestor)170 trigger_webhooks_async(invoice_data, event_type, webhooks)171 def order_cancelled(self, order: "Order", previous_value: Any) -> Any:172 if not self.active:173 return previous_value174 event_type = WebhookEventAsyncType.ORDER_CANCELLED175 if webhooks := _get_webhooks_for_event(event_type):176 order_data = generate_order_payload(order, self.requestor)177 trigger_webhooks_async(order_data, event_type, webhooks)178 def order_fulfilled(self, order: "Order", previous_value: Any) -> Any:179 if not self.active:180 return previous_value181 event_type = WebhookEventAsyncType.ORDER_FULFILLED182 if webhooks := _get_webhooks_for_event(event_type):183 order_data = generate_order_payload(order, self.requestor)184 trigger_webhooks_async(order_data, event_type, webhooks)185 def draft_order_created(self, order: "Order", previous_value: Any) -> Any:186 if not self.active:187 return previous_value188 event_type = WebhookEventAsyncType.DRAFT_ORDER_CREATED189 if webhooks := _get_webhooks_for_event(event_type):190 order_data = generate_order_payload(order, self.requestor)191 trigger_webhooks_async(order_data, event_type, webhooks)192 def draft_order_updated(self, order: "Order", previous_value: Any) -> Any:193 if not self.active:194 return previous_value195 event_type = WebhookEventAsyncType.DRAFT_ORDER_UPDATED196 if webhooks := _get_webhooks_for_event(event_type):197 order_data = generate_order_payload(order, self.requestor)198 trigger_webhooks_async(order_data, event_type, webhooks)199 def draft_order_deleted(self, order: "Order", previous_value: Any) -> Any:200 if not self.active:201 return previous_value202 event_type = WebhookEventAsyncType.DRAFT_ORDER_DELETED203 if webhooks := _get_webhooks_for_event(event_type):204 order_data = generate_order_payload(order, self.requestor)205 trigger_webhooks_async(order_data, event_type, webhooks)206 def fulfillment_created(self, fulfillment: "Fulfillment", previous_value):207 if not self.active:208 return previous_value209 event_type = WebhookEventAsyncType.FULFILLMENT_CREATED210 if webhooks := _get_webhooks_for_event(event_type):211 fulfillment_data = generate_fulfillment_payload(fulfillment, self.requestor)212 trigger_webhooks_async(fulfillment_data, event_type, webhooks)213 def fulfillment_canceled(self, fulfillment: "Fulfillment", previous_value):214 if not self.active:215 return previous_value216 event_type = WebhookEventAsyncType.FULFILLMENT_CANCELED217 if webhooks := _get_webhooks_for_event(event_type):218 fulfillment_data = generate_fulfillment_payload(fulfillment, self.requestor)219 trigger_webhooks_async(fulfillment_data, event_type, webhooks)220 def customer_created(self, customer: "User", previous_value: Any) -> Any:221 if not self.active:222 return previous_value223 event_type = WebhookEventAsyncType.CUSTOMER_CREATED224 if webhooks := _get_webhooks_for_event(event_type):225 customer_data = generate_customer_payload(customer, self.requestor)226 trigger_webhooks_async(customer_data, event_type, webhooks)227 def customer_updated(self, customer: "User", previous_value: Any) -> Any:228 if not self.active:229 return previous_value230 event_type = WebhookEventAsyncType.CUSTOMER_UPDATED231 if webhooks := _get_webhooks_for_event(event_type):232 customer_data = generate_customer_payload(customer, self.requestor)233 trigger_webhooks_async(customer_data, event_type, webhooks)234 def collection_created(self, collection: "Collection", previous_value: Any) -> Any:235 if not self.active:236 return previous_value237 event_type = WebhookEventAsyncType.COLLECTION_CREATED238 if webhooks := _get_webhooks_for_event(event_type):239 collection_data = generate_collection_payload(collection, self.requestor)240 trigger_webhooks_async(collection_data, event_type, webhooks)241 def collection_updated(self, collection: "Collection", previous_value: Any) -> Any:242 if not self.active:243 return previous_value244 event_type = WebhookEventAsyncType.COLLECTION_UPDATED245 if webhooks := _get_webhooks_for_event(event_type):246 collection_data = generate_collection_payload(collection, self.requestor)247 trigger_webhooks_async(collection_data, event_type, webhooks)248 def collection_deleted(self, collection: "Collection", previous_value: Any) -> Any:249 if not self.active:250 return previous_value251 event_type = WebhookEventAsyncType.COLLECTION_DELETED252 if webhooks := _get_webhooks_for_event(event_type):253 collection_data = generate_collection_payload(collection, self.requestor)254 trigger_webhooks_async(collection_data, event_type, webhooks)255 def product_created(self, product: "Product", previous_value: Any) -> Any:256 if not self.active:257 return previous_value258 event_type = WebhookEventAsyncType.PRODUCT_CREATED259 if webhooks := _get_webhooks_for_event(event_type):260 product_data = generate_product_payload(product, self.requestor)261 trigger_webhooks_async(product_data, event_type, webhooks)262 def product_updated(self, product: "Product", previous_value: Any) -> Any:263 if not self.active:264 return previous_value265 event_type = WebhookEventAsyncType.PRODUCT_UPDATED266 if webhooks := _get_webhooks_for_event(event_type):267 product_data = generate_product_payload(product, self.requestor)268 trigger_webhooks_async(product_data, event_type, webhooks)269 def product_deleted(270 self, product: "Product", variants: List[int], previous_value: Any271 ) -> Any:272 if not self.active:273 return previous_value274 event_type = WebhookEventAsyncType.PRODUCT_DELETED275 if webhooks := _get_webhooks_for_event(event_type):276 product_data = generate_product_deleted_payload(277 product, variants, self.requestor278 )279 trigger_webhooks_async(280 product_data,281 event_type,282 webhooks,283 )284 def product_variant_created(285 self, product_variant: "ProductVariant", previous_value: Any286 ) -> Any:287 if not self.active:288 return previous_value289 event_type = WebhookEventAsyncType.PRODUCT_VARIANT_CREATED290 if webhooks := _get_webhooks_for_event(event_type):291 product_variant_data = generate_product_variant_payload(292 [product_variant], self.requestor293 )294 trigger_webhooks_async(295 product_variant_data,296 event_type,297 webhooks,298 )299 def product_variant_updated(300 self, product_variant: "ProductVariant", previous_value: Any301 ) -> Any:302 if not self.active:303 return previous_value304 event_type = WebhookEventAsyncType.PRODUCT_VARIANT_UPDATED305 if webhooks := _get_webhooks_for_event(event_type):306 product_variant_data = generate_product_variant_payload(307 [product_variant], self.requestor308 )309 trigger_webhooks_async(310 product_variant_data,311 event_type,312 webhooks,313 )314 def product_variant_deleted(315 self, product_variant: "ProductVariant", previous_value: Any316 ) -> Any:317 if not self.active:318 return previous_value319 event_type = WebhookEventAsyncType.PRODUCT_VARIANT_DELETED320 if webhooks := _get_webhooks_for_event(event_type):321 product_variant_data = generate_product_variant_payload(322 [product_variant], self.requestor323 )324 trigger_webhooks_async(325 product_variant_data,326 event_type,327 webhooks,328 )329 def product_variant_out_of_stock(self, stock: "Stock", previous_value: Any) -> Any:330 if not self.active:331 return previous_value332 event_type = WebhookEventAsyncType.PRODUCT_VARIANT_OUT_OF_STOCK333 if webhooks := _get_webhooks_for_event(event_type):334 product_variant_data = generate_product_variant_with_stock_payload([stock])335 trigger_webhooks_async(product_variant_data, event_type, webhooks)336 def product_variant_back_in_stock(self, stock: "Stock", previous_value: Any) -> Any:337 if not self.active:338 return previous_value339 event_type = WebhookEventAsyncType.PRODUCT_VARIANT_BACK_IN_STOCK340 if webhooks := _get_webhooks_for_event(event_type):341 product_variant_data = generate_product_variant_with_stock_payload(342 [stock], self.requestor343 )344 trigger_webhooks_async(product_variant_data, event_type, webhooks)345 def checkout_created(self, checkout: "Checkout", previous_value: Any) -> Any:346 if not self.active:347 return previous_value348 event_type = WebhookEventAsyncType.CHECKOUT_CREATED349 if webhooks := _get_webhooks_for_event(event_type):350 checkout_data = generate_checkout_payload(checkout, self.requestor)351 trigger_webhooks_async(checkout_data, event_type, webhooks)352 def checkout_updated(self, checkout: "Checkout", previous_value: Any) -> Any:353 if not self.active:354 return previous_value355 event_type = WebhookEventAsyncType.CHECKOUT_UPDATED356 if webhooks := _get_webhooks_for_event(event_type):357 checkout_data = generate_checkout_payload(checkout, self.requestor)358 trigger_webhooks_async(checkout_data, event_type, webhooks)359 def notify(360 self, event: Union[NotifyEventType, str], payload: dict, previous_value361 ) -> Any:362 if not self.active:363 return previous_value364 event_type = WebhookEventAsyncType.NOTIFY_USER365 if webhooks := _get_webhooks_for_event(event_type):366 data = {367 "notify_event": event,368 "payload": payload,369 "meta": generate_meta(370 requestor_data=generate_requestor(self.requestor)371 ),372 }373 if event not in NotifyEventType.CHOICES:374 logger.info(f"Webhook {event_type} triggered for {event} notify event.")375 trigger_webhooks_async(376 json.dumps(data, cls=CustomJsonEncoder), event_type, webhooks377 )378 def page_created(self, page: "Page", previous_value: Any) -> Any:379 if not self.active:380 return previous_value381 event_type = WebhookEventAsyncType.PAGE_CREATED382 if webhooks := _get_webhooks_for_event(event_type):383 page_data = generate_page_payload(page, self.requestor)384 trigger_webhooks_async(page_data, event_type, webhooks)385 def page_updated(self, page: "Page", previous_value: Any) -> Any:386 if not self.active:387 return previous_value388 event_type = WebhookEventAsyncType.PAGE_UPDATED389 if webhooks := _get_webhooks_for_event(event_type):390 page_data = generate_page_payload(page, self.requestor)391 trigger_webhooks_async(page_data, event_type, webhooks)392 def page_deleted(self, page: "Page", previous_value: Any) -> Any:393 if not self.active:394 return previous_value395 event_type = WebhookEventAsyncType.PAGE_DELETED396 if webhooks := _get_webhooks_for_event(event_type):397 page_data = generate_page_payload(page, self.requestor)398 trigger_webhooks_async(page_data, event_type, webhooks)399 def translation_created(self, translation: "Translation", previous_value: Any):400 if not self.active:401 return previous_value402 event_type = WebhookEventAsyncType.TRANSLATION_CREATED403 if webhooks := _get_webhooks_for_event(event_type):404 translation_data = generate_translation_payload(translation, self.requestor)405 trigger_webhooks_async(translation_data, event_type, webhooks)406 def translation_updated(self, translation: "Translation", previous_value: Any):407 if not self.active:408 return previous_value409 event_type = WebhookEventAsyncType.TRANSLATION_UPDATED410 if webhooks := _get_webhooks_for_event(event_type):411 translation_data = generate_translation_payload(translation, self.requestor)412 trigger_webhooks_async(translation_data, event_type, webhooks)413 def event_delivery_retry(self, delivery: "EventDelivery", previous_value: Any):414 if not self.active:415 return previous_value416 delivery_update(delivery, status=EventDeliveryStatus.PENDING)417 send_webhook_request_async.delay(delivery.pk)418 def __run_payment_webhook(419 self,420 event_type: str,421 transaction_kind: str,422 payment_information: "PaymentData",423 previous_value,424 **kwargs425 ) -> "GatewayResponse":426 if not self.active:427 return previous_value428 app = None429 payment_app_data = from_payment_app_id(payment_information.gateway)430 if payment_app_data is not None:431 app = (432 App.objects.for_event_type(event_type)433 .filter(pk=payment_app_data.app_pk)434 .first()435 )436 if not app:437 logger.warning(438 "Payment webhook for event %r failed - no active app found: %r",439 event_type,440 payment_information.gateway,441 )442 raise PaymentError(443 f"Payment method {payment_information.gateway} is not available: "444 "app not found."445 )446 webhook_payload = generate_payment_payload(payment_information)447 response_data = trigger_webhook_sync(event_type, webhook_payload, app)448 if response_data is None:449 raise PaymentError(450 f"Payment method {payment_information.gateway} is not available: "451 "no response from the app."452 )453 return parse_payment_action_response(454 payment_information, response_data, transaction_kind455 )456 def token_is_required_as_payment_input(self, previous_value):457 return False458 def get_payment_gateways(459 self,460 currency: Optional[str],461 checkout: Optional["Checkout"],462 previous_value,463 **kwargs464 ) -> List["PaymentGateway"]:465 gateways = []466 apps = App.objects.for_event_type(467 WebhookEventSyncType.PAYMENT_LIST_GATEWAYS468 ).prefetch_related("webhooks")469 for app in apps:470 response_data = trigger_webhook_sync(471 event_type=WebhookEventSyncType.PAYMENT_LIST_GATEWAYS,472 data=generate_list_gateways_payload(currency, checkout),473 app=app,474 )475 if response_data:476 app_gateways = parse_list_payment_gateways_response(response_data, app)477 if currency:478 app_gateways = [479 gtw for gtw in app_gateways if currency in gtw.currencies480 ]481 gateways.extend(app_gateways)482 return gateways483 def authorize_payment(484 self, payment_information: "PaymentData", previous_value, **kwargs485 ) -> "GatewayResponse":486 return self.__run_payment_webhook(487 WebhookEventSyncType.PAYMENT_AUTHORIZE,488 TransactionKind.AUTH,489 payment_information,490 previous_value,491 **kwargs,492 )493 def capture_payment(494 self, payment_information: "PaymentData", previous_value, **kwargs495 ) -> "GatewayResponse":496 return self.__run_payment_webhook(497 WebhookEventSyncType.PAYMENT_CAPTURE,498 TransactionKind.CAPTURE,499 payment_information,500 previous_value,501 **kwargs,502 )503 def refund_payment(504 self, payment_information: "PaymentData", previous_value, **kwargs505 ) -> "GatewayResponse":506 return self.__run_payment_webhook(507 WebhookEventSyncType.PAYMENT_REFUND,508 TransactionKind.REFUND,509 payment_information,510 previous_value,511 **kwargs,512 )513 def void_payment(514 self, payment_information: "PaymentData", previous_value, **kwargs515 ) -> "GatewayResponse":516 return self.__run_payment_webhook(517 WebhookEventSyncType.PAYMENT_VOID,518 TransactionKind.VOID,519 payment_information,520 previous_value,521 **kwargs,522 )523 def confirm_payment(524 self, payment_information: "PaymentData", previous_value, **kwargs525 ) -> "GatewayResponse":526 return self.__run_payment_webhook(527 WebhookEventSyncType.PAYMENT_CONFIRM,528 TransactionKind.CONFIRM,529 payment_information,530 previous_value,531 **kwargs,532 )533 def process_payment(534 self, payment_information: "PaymentData", previous_value, **kwargs535 ) -> "GatewayResponse":536 return self.__run_payment_webhook(537 WebhookEventSyncType.PAYMENT_PROCESS,538 TransactionKind.CAPTURE,539 payment_information,540 previous_value,541 **kwargs,542 )543 def get_shipping_methods_for_checkout(544 self, checkout: "Checkout", previous_value: Any545 ) -> List["ShippingMethodData"]:546 methods = []547 apps = App.objects.for_event_type(548 WebhookEventSyncType.SHIPPING_LIST_METHODS_FOR_CHECKOUT549 ).prefetch_related("webhooks")550 if apps:551 payload = generate_checkout_payload(checkout, self.requestor)552 for app in apps:553 response_data = trigger_webhook_sync(554 event_type=WebhookEventSyncType.SHIPPING_LIST_METHODS_FOR_CHECKOUT,555 data=payload,556 app=app,557 )558 if response_data:559 shipping_methods = parse_list_shipping_methods_response(560 response_data, app561 )562 methods.extend(shipping_methods)563 return methods564 def excluded_shipping_methods_for_order(565 self,566 order: "Order",567 available_shipping_methods: List["ShippingMethodData"],568 previous_value: List[ExcludedShippingMethod],569 ) -> List[ExcludedShippingMethod]:570 generate_function = generate_excluded_shipping_methods_for_order_payload571 payload_fun = lambda: generate_function( # noqa: E731572 order,573 available_shipping_methods,574 )575 cache_key = CACHE_EXCLUDED_SHIPPING_KEY + str(order.id)576 return get_excluded_shipping_data(577 event_type=WebhookEventSyncType.ORDER_FILTER_SHIPPING_METHODS,578 previous_value=previous_value,579 payload_fun=payload_fun,580 cache_key=cache_key,581 )582 def excluded_shipping_methods_for_checkout(583 self,584 checkout: "Checkout",585 available_shipping_methods: List["ShippingMethodData"],586 previous_value: List[ExcludedShippingMethod],587 ) -> List[ExcludedShippingMethod]:588 generate_function = generate_excluded_shipping_methods_for_checkout_payload589 payload_function = lambda: generate_function( # noqa: E731590 checkout,591 available_shipping_methods,592 )593 cache_key = CACHE_EXCLUDED_SHIPPING_KEY + str(checkout.token)594 return get_excluded_shipping_data(595 event_type=WebhookEventSyncType.CHECKOUT_FILTER_SHIPPING_METHODS,596 previous_value=previous_value,597 payload_fun=payload_function,598 cache_key=cache_key,599 )600 def is_event_active(self, event: str, channel=Optional[str]):601 map_event = {"invoice_request": WebhookEventAsyncType.INVOICE_REQUESTED}602 webhooks = _get_webhooks_for_event(event_type=map_event[event])...
data_visualisation.py
Source:data_visualisation.py
1import numpy as np2import scipy as sc3import pandas as pd4import sklearn as sk5import matplotlib.pyplot as plt6from sklearn.ensemble import GradientBoostingClassifier as GBC7from pandas import read_csv, DataFrame8import os,sys9import pickle10import data_preparation as dp 11import PyROOTPlots as PyRPl12from ROOT import *13import BDT_file_handler as BDT_fh14import script_utils as script_utils15import matplotlib.colors as mcolors16import xgboost as xgb17from sklearn import preprocessing18from sklearn.decomposition import PCA, KernelPCA, RandomizedPCA19from sklearn.lda import LDA20def make_colormap(seq):21 """Return a LinearSegmentedColormap22 seq: a sequence of floats and RGB-tuples. The floats should be increasing23 and in the interval (0,1).24 """25 seq = [(None,) * 3, 0.0] + list(seq) + [1.0, (None,) * 3]26 cdict = {"red": [], "green": [], "blue": []}27 for i, item in enumerate(seq):28 if isinstance(item, float):29 r1, g1, b1 = seq[i - 1]30 r2, g2, b2 = seq[i + 1]31 cdict["red"].append([item, r1, r2])32 cdict["green"].append([item, g1, g2])33 cdict["blue"].append([item, b1, b2])34 return mcolors.LinearSegmentedColormap("CustomMap", cdict)35def plot_results(d_test, d_eval, d_event_dir, classifier_type, WIMP_mass, bolo_name, analysis_type, exposure, bin_X, min_X, max_X):36 """37 Detail:38 Plot results39 Args:40 d_test (dict) = dict with test data41 d_eval (dict) = dict with eval data 42 d_event_dir (dict) = dict to get the proper directory of each event class 43 classifier_type (str) = type of classifier44 WIMP_mass (str) = WIMP mass45 bolo_name (str) = bolometer name 46 analysis_type (str) = type of analysis (which box cut)47 exposure (float) = exposure in days 48 bin_X, min_X, max_X (int float float) = TH1F parameters49 Returns:50 void51 Raises:52 void 53 """54 #Get scaling dict for data visualisation55 d_scaling = BDT_fh.open_MVA_scaling_file(bolo_name, analysis_type, "")56 #Load classifier57 pickle_dir = script_utils.create_directory("./Classifier_files/" + bolo_name + "/" + analysis_type + "/")58 clf_file = open(pickle_dir + classifier_type + "_mass_" + str(WIMP_mass) + ".pkl", 'rb')59 clf = pickle.load(clf_file)60 clf_file.close()61 #Get predictions on test sample62 d_pred = {}63 d_hist = {}64 d_color = {"S1Pb":kOrange-8, "S2Pb":kOrange-9, "S1Beta":kGreen+2, "S2Beta":kGreen-3,65 "S1Gamma":kBlue-7, "S2Gamma":kBlue, "FidGamma":kAzure+10, "heatonly":kRed, "WIMP_mass_" + WIMP_mass:kGray, "neutron":kMagenta}66 for event_type in d_test.keys():67 d_pred[event_type] = clf.predict_proba(d_test[event_type].iloc[:,:6].values)68 d_hist[event_type] = TH1F("h" + event_type, "h" + event_type, bin_X, min_X, max_X)69 PyRPl.fill_TH1(d_hist[event_type], d_pred[event_type][:,1])70 PyRPl.process_TH1(d_hist[event_type], use_fill_bool = True, color = d_color[event_type] )71 if "WIMP" not in event_type:72 d_hist[event_type].Scale(float(d_scaling["prop_" + event_type])*float(d_scaling["exp_per_day"])*exposure/float(d_hist[event_type].Integral()))73 #get predictions on data74 hdata = TH1F("hdata", "hdata", bin_X, min_X, max_X)75 PyRPl.fill_TH1(hdata, clf.predict_proba(d_eval["realdata"].iloc[:,:6].values)[:,1])76 d_hist["WIMP_mass_" + WIMP_mass].Scale(hdata.Integral()/d_hist["WIMP_mass_" + WIMP_mass].Integral())77 d_hist["S1Pb"].Add(d_hist["S2Pb"])78 d_hist["S1Beta"].Add(d_hist["S2Beta"])79 d_hist["FidGamma"].Add(d_hist["S1Gamma"])80 d_hist["FidGamma"].Add(d_hist["S2Gamma"])81 list_hist =[d_hist["S1Pb"], d_hist["S1Beta"], d_hist["FidGamma"], d_hist["heatonly"], d_hist["WIMP_mass_" + WIMP_mass]]82 hs=THStack("hs", "hs")83 for hist in list_hist:84 hs.Add(hist)85 cc = TCanvas("cc", "cc")86 h1=TH1F("h1","h1", bin_X, min_X, max_X)87 PyRPl.process_TH1(h1, X_title="BDT ouput", min_Y = 1E-1, max_Y = 20000)88 89 gPad.SetLogy()90 h1.Draw()91 hs.Draw("same")92 hdata.Draw("sameE1")93 leg = TLegend(0.14,0.50,0.33,0.87)94 leg.AddEntry(d_hist["S1Pb"].GetName(),"Lead" ,"f")95 leg.AddEntry(d_hist["S1Beta"].GetName(),"Beta", "f")96 leg.AddEntry(d_hist["FidGamma"].GetName(),"Gamma", "f")97 leg.AddEntry(d_hist["heatonly"].GetName(),"Heat-only", "f")98 leg.AddEntry(d_hist["WIMP_mass_" + WIMP_mass].GetName(),"WIMP " + WIMP_mass+ " GeV","f")99 leg.SetFillColor(kWhite)100 leg.SetBorderSize(0)101 leg.Draw("same")102 103 print d_hist["WIMP_mass_" + WIMP_mass].Integral(d_hist["WIMP_mass_" + WIMP_mass].FindBin(0.9), 1)/d_hist["WIMP_mass_" + WIMP_mass].Integral()104 raw_input()105 for key in d_hist.keys(): del d_hist[key]106 del d_hist107 del h1108def plot_control(d_test, d_eval, d_event_dir, classifier_type, WIMP_mass, bolo_name, analysis_type, exposure, bin_X, min_X, max_X):109 """110 Detail:111 Plot results112 Args:113 d_test (dict) = dict with test data114 d_eval (dict) = dict with eval data 115 d_event_dir (dict) = dict to get the proper directory of each event class 116 classifier_type (str) = type of classifier117 WIMP_mass (str) = WIMP mass118 bolo_name (str) = bolometer name 119 analysis_type (str) = type of analysis (which box cut)120 exposure (float) = exposure in days 121 bin_X, min_X, max_X (int float float) = TH1F parameters122 Returns:123 void124 Raises:125 void 126 """127 #Load classifier128 pickle_dir = script_utils.create_directory("./Classifier_files/" + bolo_name + "/" + analysis_type + "/")129 clf_file = open(pickle_dir + classifier_type + "_mass_" + str(WIMP_mass) + ".pkl", 'rb')130 clf = pickle.load(clf_file)131 clf_file.close()132 #Get predictions133 d_pred = {}134 for event_type in d_test.keys():135 d_pred[event_type] = clf.predict_proba(d_test[event_type].iloc[:,:6].values)136 d_pred["realdata"] = clf.predict_proba(d_eval["realdata"].iloc[:,:6].values)137 138 #Get color map139 c = mcolors.ColorConverter().to_rgb140 rvb = make_colormap([c("red"), c("red"), 0.33, c("red"), c("green"), 0.86, c("green")])141 142 #Build list of test data of interest143 list_event_type = ["FidGamma", "WIMP_mass_" + WIMP_mass]144 #Output dir 145 fig_dir = script_utils.create_directory("./Figures/" + bolo_name + "/" + analysis_type + "/")146 #Compute new columns for data frames147 d_eval["realdata"]["EC"] = 0.5*(d_eval["realdata"]["EC1"] + d_eval["realdata"]["EC2"])148 d_eval["realdata"]["EFID"] = 0.5*(d_eval["realdata"]["EIB"] + d_eval["realdata"]["EID"])149 d_eval["realdata"]["MVA"] = d_pred["realdata"][:,1]150 for event_type in list_event_type:151 d_test[event_type]["EC"] = 0.5*(d_test[event_type]["EC1"] + d_test[event_type]["EC2"])152 d_test[event_type]["EFID"] = 0.5*(d_test[event_type]["EIB"] + d_test[event_type]["EID"])153 d_test[event_type]["MVA"] = d_pred[event_type][:,1]154 #Plots for real data155 plt.figure()156 plt.scatter(d_test["WIMP_mass_" + WIMP_mass]["EC"].values, d_test["WIMP_mass_" + WIMP_mass]["EFID"].values, color ="0.6", s=1 )157 plt.scatter(d_eval["realdata"]["EC"], d_eval["realdata"]["EFID"], s =20, c=d_eval["realdata"]["MVA"].values, cmap=rvb, vmin = -0.5, vmax = 1)158 # d_test["WIMP_mass_" + WIMP_mass].plot(kind='scatter', x='EC', y='EFID', color="0.6", s = 1)159 # ax=d_eval["realdata"].plot(kind='scatter', x='EC', y='EFID', c=d_eval["realdata"]["MVA"], cmap=rvb, vmin = 0, vmax = 1) 160 161 cbar = plt.colorbar()162 cbar.set_label("MVA output", labelpad = 15, fontsize= 18)163 plt.xlabel("Heat (keV)", fontsize = 20)164 plt.ylabel("Fiducial Ionisation (keV)", fontsize = 20)165 plt.ylim([0,5])166 plt.xlim([0.5,5])167 plt.grid(True)168 plt.savefig(fig_dir + bolo_name + "_real_data_WIMP_mass_" + WIMP_mass + "_" + classifier_type + ".png")169 plt.close("all")170 for event_type in list_event_type:171 plt.figure()172 plt.scatter(d_test["WIMP_mass_" + WIMP_mass]["EC"].values, d_test["WIMP_mass_" + WIMP_mass]["EFID"].values, color ="0.6", s=1 )173 plt.scatter(d_test[event_type]["EC"], d_test[event_type]["EFID"], s =20, c=d_test[event_type]["MVA"].values, cmap=rvb, vmin = -0.5, vmax = 1)174 # d_test["WIMP_mass_" + WIMP_mass].plot(kind='scatter', x='EC', y='EFID', color="0.6", s = 1)175 # d_test[event_type].plot(kind='scatter', x='EC', y='EFID', c=d_test[event_type]["MVA"], cmap=rvb, vmin = 0, vmax = 1)176 cbar = plt.colorbar()177 cbar.set_label("BDT output", labelpad = 15, fontsize= 18)178 plt.xlabel("Heat (keV)", fontsize = 20)179 plt.ylabel("Fiducial Ionisation (keV)", fontsize = 20)180 plt.ylim([0,5])181 plt.xlim([0.5,5])182 plt.grid(True)183 plt.savefig(fig_dir + bolo_name + "_" + event_type + "_WIMP_mass_" + WIMP_mass + "_" + classifier_type + ".png")184 plt.close("all")185def plot_results_xgboost(d_test, d_eval, d_event_dir, WIMP_mass, bolo_name, analysis_type, MVA_tag, exposure, bin_X, min_X, max_X, **kwargs):186 """187 Detail:188 Plot results189 Args:190 d_test (dict) = dict with test data191 d_eval (dict) = dict with eval data 192 d_event_dir (dict) = dict to get the proper directory of each event class 193 WIMP_mass (str) = WIMP mass194 bolo_name (str) = bolometer name 195 analysis_type (str) = type of analysis (which box cut)196 MVA_tag (str) = indicates which scaling file to use197 exposure (float) = exposure in days 198 bin_X, min_X, max_X (int float float) = TH1F parameters199 Returns:200 void201 Raises:202 void 203 """204 try:205 kwargs["weight_dir"]206 except KeyError:207 sys.exit()208 #Get scaling dict to set the weights209 d_scaling = BDT_fh.open_MVA_scaling_file(bolo_name, analysis_type, MVA_tag)210 # #Load PCA211 # pickle_dir = script_utils.create_directory("./Classifier_files/" + bolo_name + "/" + analysis_type + "/")212 # pca_file = open(pickle_dir + "pca_classifier_mass_" + str(WIMP_mass) + ".pkl", 'rb')213 # pca = pickle.load(pca_file)214 # pca_file.close()215 key_heat = ""216 for key in d_test.keys():217 if "heat" in key:218 key_heat = key219 # Get classifier220 model_dir = script_utils.create_directory("./Classifier_files/" + bolo_name + "/" + analysis_type + "/"+ kwargs["weight_dir"] + "/")221 modelfile = model_dir + "xgboost_classifier_mass_" + str(WIMP_mass) +".model"222 if kwargs.has_key("classifier_name"):223 modelfile = model_dir + "xgboost_classifier_mass_" + str(WIMP_mass) + "_" + kwargs["classifier_name"] + ".model"224 bst = xgb.Booster({'nthread':16}, model_file = modelfile)225 #Get predictions on test sample226 d_pred = {}227 d_hist = {}228 d_color = {"S1Pb":kOrange-8, "S2Pb":kOrange-9, "S1Beta":kGreen+2, "S2Beta":kGreen-3,229 "S1Gamma":kBlue-7, "S2Gamma":kBlue, "FidGamma":kAzure+10, key_heat: kRed, "WIMP_mass_" + WIMP_mass:kGray, "neutron":kMagenta}230 for event_type in d_test.keys():231 d_pred[event_type] = bst.predict( xgb.DMatrix(d_test[event_type].iloc[:,:-3].values) )232 d_hist[event_type] = TH1F("h" + event_type+ WIMP_mass, "h" + event_type+ WIMP_mass, bin_X, min_X, max_X)233 PyRPl.fill_TH1(d_hist[event_type], d_pred[event_type])234 PyRPl.process_TH1(d_hist[event_type], use_fill_bool = True, color = d_color[event_type] )235 if "WIMP" not in event_type:236 d_hist[event_type].Scale(float(d_scaling["prop_" + event_type])*float(d_scaling["exp_per_day"])*exposure/float(d_hist[event_type].Integral()))237 print "Event type:", event_type, "\tExpected #:", float(d_scaling["prop_" + event_type])*float(d_scaling["exp_per_day"])*exposure238 #get predictions on data239 hdata = TH1F("hdata" + WIMP_mass, "hdata" + WIMP_mass, bin_X, min_X, max_X)240 PyRPl.fill_TH1(hdata, bst.predict( xgb.DMatrix(d_eval["realdata"].iloc[:,:].values) ) )241 d_hist["WIMP_mass_" + WIMP_mass].Scale(hdata.Integral()/d_hist["WIMP_mass_" + WIMP_mass].Integral())242 d_hist["S1Pb"].Add(d_hist["S2Pb"])243 d_hist["S1Beta"].Add(d_hist["S2Beta"])244 d_hist["FidGamma"].Add(d_hist["S1Gamma"])245 d_hist["FidGamma"].Add(d_hist["S2Gamma"])246 list_hist =[d_hist["S1Pb"], d_hist["S1Beta"], d_hist["FidGamma"], d_hist[key_heat], d_hist["WIMP_mass_" + WIMP_mass]]247 hs=THStack("hs" + WIMP_mass, "hs" + WIMP_mass)248 for hist in list_hist:249 hs.Add(hist)250 list_hist =[d_hist["S1Pb"], d_hist["S1Beta"], d_hist["FidGamma"], d_hist[key_heat], d_hist["WIMP_mass_" + WIMP_mass]]251 hsum_bckg=TH1F("hsum_bckg","hsum_bckg", bin_X, min_X, max_X)252 for i in range(1, bin_X+1): 253 hsum_bckg.SetBinContent(i, sum([h.GetBinContent(i) for h in list_hist[:-1]]))254 # print "Chi2: ", hdata.Chi2Test(hsum_bckg, "P")255 del hsum_bckg256 cc = TCanvas("cc", "cc")257 h1=TH1F("h1" + WIMP_mass,"h1" + WIMP_mass, bin_X, min_X, max_X)258 PyRPl.process_TH1(h1, X_title="BDT ouput", min_Y = 1E-1, max_Y = 20000)259 260 gPad.SetLogy()261 h1.Draw()262 hs.Draw("same")263 hdata.Draw("sameE1")264 leg = TLegend(0.14,0.50,0.33,0.87)265 leg.AddEntry(d_hist["S1Pb"].GetName(),"Lead" ,"f")266 leg.AddEntry(d_hist["S1Beta"].GetName(),"Beta", "f")267 leg.AddEntry(d_hist["FidGamma"].GetName(),"Gamma", "f")268 leg.AddEntry(d_hist[key_heat].GetName(),"Heat-only", "f")269 leg.AddEntry(d_hist["WIMP_mass_" + WIMP_mass].GetName(),"WIMP " + WIMP_mass+ " GeV","f")270 leg.SetFillColor(kWhite)271 leg.SetBorderSize(0)272 leg.Draw("same")273 274 raw_input()275 fig_dir = script_utils.create_directory("./Figures/"+ bolo_name + "/" + analysis_type + "/"+ kwargs["weight_dir"] + "/")276 cc.Print(fig_dir + bolo_name + "_BDT_mass_" + str(WIMP_mass) + ".eps")277def plot_control_xgboost(d_test, d_eval, d_event_dir, WIMP_mass, bolo_name, analysis_type, MVA_tag, exposure, bin_X, min_X, max_X, list_variables, **kwargs):278 """279 Detail:280 Plot results control281 Args:282 d_test (dict) = dict with test data283 d_eval (dict) = dict with eval data 284 d_event_dir (dict) = dict to get the proper directory of each event class 285 WIMP_mass (str) = WIMP mass286 bolo_name (str) = bolometer name 287 analysis_type (str) = type of analysis (which box cut)288 MVA_tag (str) = indicates which scaling file to use289 exposure (float) = exposure in days 290 bin_X, min_X, max_X (int float float) = TH1F parameters291 Returns:292 void293 Raises:294 void 295 """296 try:297 kwargs["weight_dir"]298 except KeyError:299 sys.exit()300 #Get scaling dict to set the weights301 d_scaling = BDT_fh.open_MVA_scaling_file(bolo_name, analysis_type, MVA_tag)302 key_heat = ""303 for key in d_test.keys():304 if "heat" in key:305 key_heat = key306 ##################307 # Temporary to check effect of EC versus 0.5(EC1+EC2)308 # Effect small309 ####################310 # d_eval = {}311 # data_dir = script_utils.create_directory("/home/irfulx204/mnt/tmain/Desktop/BDT_Scikit/Eval_data/" + bolo_name + "/" + analysis_type + "/")312 # d_eval["realdata"] = pd.read_csv(data_dir + bolo_name + "_" + analysis_type + "_fond.csv", usecols = ["EC1","EC2","EIA","EIB","EIC","EID","EC","EFID", "HR"])313 # temp_eval_EC = d_eval["realdata"]["EC"]314 # d_eval["realdata"] = d_eval["realdata"][list_variables]315 # Get classifier316 model_dir = script_utils.create_directory("./Classifier_files/" + bolo_name + "/" + analysis_type + "/"+ kwargs["weight_dir"] + "/")317 modelfile = model_dir + "xgboost_classifier_mass_" + str(WIMP_mass) +".model"318 if kwargs.has_key("classifier_name"):319 modelfile = model_dir + "xgboost_classifier_mass_" + str(WIMP_mass) + "_" + kwargs["classifier_name"] + ".model"320 bst = xgb.Booster({'nthread':16}, model_file = modelfile)321 #Get predictions322 d_pred = {}323 for event_type in d_test.keys():324 d_pred[event_type] = bst.predict( xgb.DMatrix(d_test[event_type].iloc[:,:-3].values))325 d_pred["realdata"] = bst.predict( xgb.DMatrix(d_eval["realdata"].iloc[:,:].values) ) 326 327 #Get color map328 c = mcolors.ColorConverter().to_rgb329 rvb = make_colormap([c("red"), c("red"), 0.33, c("red"), c("green"), 0.86, c("green")])330 331 #Build list of test data of interest332 list_event_type = ["FidGamma", "heatonly", "WIMP_mass_" + WIMP_mass]333 list_event_type = ["WIMP_mass_" + WIMP_mass]334 # list_event_type = ["heatonly"]335 #Output dir 336 fig_dir = script_utils.create_directory("./Figures/" + bolo_name + "/" + analysis_type + "/" + kwargs["weight_dir"] + "/")337 #Compute new columns for data frames338 d_eval["realdata"]["EC"] = 0.5*(d_eval["realdata"]["EC1"] + d_eval["realdata"]["EC2"]) # temp_eval_EC 339 if "EIB" in list(d_eval["realdata"].columns.values) and "EID" in list(d_eval["realdata"].columns.values):340 d_eval["realdata"]["EFID"] = 0.5*(d_eval["realdata"]["EIB"] + d_eval["realdata"]["EID"])341 d_eval["realdata"]["MVA"] = d_pred["realdata"]342 # plt.hist(d_eval["realdata"]["MVA"], bins=100)343 # plt.show()344 # raw_input()345 for event_type in list_event_type:346 d_test[event_type]["EC"] = 0.5*(d_test[event_type]["EC1"] + d_test[event_type]["EC2"])347 if "EIB" in list(d_test[event_type].columns.values) and "EID" in list(d_test[event_type].columns.values):348 d_test[event_type]["EFID"] = 0.5*(d_test[event_type]["EIB"] + d_test[event_type]["EID"])349 d_test[event_type]["MVA"] = d_pred[event_type]350 # l = np.where(d_eval["realdata"]["MVA"]>0)351 # d_eval["realdata"] = d_eval["realdata"].iloc[l]352 # l = np.where(np.logical_and(d_eval["realdata"]["EIA"]<1, d_eval["realdata"]["EIC"]<1))353 # d_eval["realdata"] = d_eval["realdata"].iloc[l]354 # l = np.where(d_test["WIMP_mass_" + WIMP_mass]["MVA"]>3)355 # d_test["WIMP_mass_" + WIMP_mass] = d_test["WIMP_mass_" + WIMP_mass].iloc[l]356 #Plots for real data357 plt.figure()358 plt.scatter(d_test["WIMP_mass_" + WIMP_mass]["EC"].values, d_test["WIMP_mass_" + WIMP_mass]["EFID"].values, color ="0.6", s=1 )359 plt.scatter(d_eval["realdata"]["EC"], d_eval["realdata"]["EFID"], s =20, c=d_eval["realdata"]["MVA"].values, cmap=rvb, vmin = -10, vmax = 10)360 cbar = plt.colorbar()361 cbar.set_label("BDT output", labelpad = 15, fontsize= 18)362 plt.xlabel("Heat (keV)", fontsize = 20)363 plt.ylabel("Fiducial Ionisation (keV)", fontsize = 20)364 plt.ylim([0,5])365 plt.xlim([0.5,5])366 plt.grid(True)367 plt.savefig(fig_dir + bolo_name + "_real_data_WIMP_mass_" + WIMP_mass + ".png")368 plt.close("all")369 # for event_type in list_event_type:370 # plt.figure()371 # # plt.scatter(d_test["WIMP_mass_" + WIMP_mass]["EC"].values, d_test["WIMP_mass_" + WIMP_mass]["EFID"].values, color ="0.6", s=1 )372 # plt.scatter(d_test[event_type]["EC"], d_test[event_type]["EFID"], s =20, c=d_test[event_type]["MVA"].values, cmap=rvb, vmin = -13, vmax = 6)373 # cbar = plt.colorbar()374 # cbar.set_label("BDT output", labelpad = 15, fontsize= 18)375 # plt.xlabel("Heat (keV)", fontsize = 20)376 # plt.ylabel("Fiducial Ionisation (keV)", fontsize = 20)377 # plt.ylim([0,5])378 # plt.xlim([0.5,5])379 # plt.grid(True)380 # plt.savefig(fig_dir + bolo_name + "_" + event_type + "_WIMP_mass_" + WIMP_mass + ".png")381 # plt.close("all")382def plot_PCA_stuff(d_test, d_eval, d_event_dir, classifier_type, WIMP_mass, bolo_name, analysis_type, exposure, bin_X, min_X, max_X, pca_index):383 """384 Detail:385 Plot PCA results386 Args:387 d_test (dict) = dict with test data388 d_eval (dict) = dict with eval data 389 d_event_dir (dict) = dict to get the proper directory of each event class 390 classifier_type (str) = type of classifier391 WIMP_mass (str) = WIMP mass392 bolo_name (str) = bolometer name 393 analysis_type (str) = type of analysis (which box cut)394 exposure (float) = exposure in days 395 bin_X, min_X, max_X (int float float) = TH1F parameters396 Returns:397 void398 Raises:399 void 400 """401 #Get scaling dict for data visualisation402 d_scaling = BDT_fh.open_MVA_scaling_file(bolo_name, analysis_type, "")403 #Load PCA404 pickle_dir = script_utils.create_directory("./Classifier_files/" + bolo_name + "/" + analysis_type + "/")405 pca_file = open(pickle_dir + "pca_classifier_mass_" + str(WIMP_mass) + ".pkl", 'rb')406 pca = pickle.load(pca_file)407 pca_file.close()408 # Get classifier409 model_dir = script_utils.create_directory("./Classifier_files/" + bolo_name + "/" + analysis_type + "/")410 modelfile = model_dir + "xgboost_classifier_mass_" + str(WIMP_mass) +".model"411 bst = xgb.Booster({'nthread':16}, model_file = modelfile)412 #Get predictions on test sample413 d_hist = {}414 d_color = {"S1Pb":kOrange-8, "S2Pb":kOrange-9, "S1Beta":kGreen+2, "S2Beta":kGreen-3,415 "S1Gamma":kBlue-7, "S2Gamma":kBlue, "FidGamma":kAzure+10, "heatonly":kRed, "WIMP_mass_" + WIMP_mass:kGray, "neutron":kMagenta}416 print pca.transform(d_test["FidGamma"].iloc[:,:-2].values).shape417 print pca.transform(d_eval["realdata"].iloc[:,:].values).shape418 for event_type in ["S1Gamma", "S2Gamma", "FidGamma", "S1Beta", "S2Beta", "S1Pb", "S2Pb", "heatonly"]:419 d_hist[event_type] = TH1F("h" + event_type, "h" + event_type, bin_X, min_X, max_X)420 PyRPl.fill_TH1(d_hist[event_type], bst.predict( xgb.DMatrix(pca.transform(d_test[event_type].iloc[:,:-2].values) )))421 PyRPl.process_TH1(d_hist[event_type], use_fill_bool = True, color = d_color[event_type] )422 d_hist[event_type].Scale(float(d_scaling["prop_" + event_type])*float(d_scaling["exp_per_day"])*exposure/float(d_hist[event_type].Integral()))423 #get predictions on data424 hdata = TH1F("hdata", "hdata", bin_X, min_X, max_X)425 PyRPl.fill_TH1(hdata, bst.predict( xgb.DMatrix(pca.transform(d_eval["realdata"].iloc[:,:].values))))426 d_hist["S1Pb"].Add(d_hist["S2Pb"])427 d_hist["S1Beta"].Add(d_hist["S2Beta"])428 d_hist["FidGamma"].Add(d_hist["S1Gamma"])429 d_hist["FidGamma"].Add(d_hist["S2Gamma"])430 list_hist =[d_hist["S1Pb"], d_hist["S1Beta"], d_hist["FidGamma"], d_hist["heatonly"]]431 hs=THStack("hs", "hs")432 for hist in list_hist:433 hs.Add(hist)434 cc = TCanvas("cc", "cc")435 h1=TH1F("h1","h1", bin_X, min_X, max_X)436 PyRPl.process_TH1(h1, X_title="PCA var", min_Y = 1E-1, max_Y = 20000)437 438 gPad.SetLogy()439 h1.Draw()440 hs.Draw("same")441 hdata.Draw("sameE1")442 leg = TLegend(0.14,0.50,0.33,0.87)443 leg.AddEntry(d_hist["S1Pb"].GetName(),"Lead" ,"f")444 leg.AddEntry(d_hist["S1Beta"].GetName(),"Beta", "f")445 leg.AddEntry(d_hist["FidGamma"].GetName(),"Gamma", "f")446 leg.AddEntry(d_hist["heatonly"].GetName(),"Heat-only", "f")447 leg.SetFillColor(kWhite)448 leg.SetBorderSize(0)449 leg.Draw("same")450 ...
event_type_maps.py
Source:event_type_maps.py
1import pandas as pd2import re3def assign_events_emdat(db):4 # Floods5 flood = db.dataset_event_type.str.match('^Flood_').fillna(False)6 db.loc[flood, 'event_type'] = 'A6'7 # storms8 storm = db.dataset_event_type.str.match('^Storm_').fillna(False)9 db.loc[storm, 'event_type'] = 'A5'10 # industrial accident & miscellaneous accident11 ind_acc = db.dataset_event_type.str.match(12 '^Industrial accident_').fillna(False)13 misc_acc = db.dataset_event_type.str.match(14 '^Miscellaneous accident_').fillna(False)15 db.loc[(ind_acc | misc_acc), 'event_type'] = 'D3'16 # earthquake17 quake = db.dataset_event_type.str.match('^Earthquake_').fillna(False)18 db.loc[quake, 'event_type'] = 'A2'19 # Famine (drought and complex disasters that include "food shortage" or20 # "famine" in their other disaster types.)21 cd = db.dataset_event_type.str.match('^Complex Disasters_').fillna(False)22 drought = db.dataset_event_type.str.match('^Drought_').fillna(False)23 food = db.dataset_event_type.str.match('.*Food shortage').fillna(False)24 famine = db.dataset_event_type.str.match('.*Famine_').fillna(False)25 db.loc[((cd | drought) & (food | famine)), 'event_type'] = 'C'26 db.loc[drought, 'event_type'] = 'C'27 # Earth movements (landslide, avalanche, etc.)28 slide = db.dataset_event_type.str.match('^Landslide_').fillna(False)29 mm = db.dataset_event_type.str.match('^Mass movement').fillna(False)30 db.loc[(slide | mm), 'event_type'] = 'A4'31 # Extreme Temperature, insect infestation (other)32 hot_cold = db.dataset_event_type.str.match(33 '^Extreme temperature').fillna(False)34 bugs = db.dataset_event_type.str.match('^Insect infestation').fillna(False)35 db.loc[(hot_cold | bugs), 'event_type'] = 'A7'36 # Wildfire37 fire = db.dataset_event_type.str.match('^Wildfire_').fillna(False)38 db.loc[fire, 'event_type'] = 'D2'39 # Volcanic activity40 volcano = db.dataset_event_type.str.match(41 '^Volcanic activity_').fillna(False)42 db.loc[volcano, 'event_type'] = 'A3'43 # transportation - air, rail, water, road44 rail = db.dataset_event_type.str.match(45 '^Transport accident_Rail').fillna(False)46 db.loc[rail, 'event_type'] = 'D5'47 air = db.dataset_event_type.str.match(48 '^Transport accident_Air').fillna(False)49 db.loc[air, 'event_type'] = 'D5'50 boat = db.dataset_event_type.str.match(51 '^Transport accident_Water').fillna(False)52 db.loc[boat, 'event_type'] = 'D5.1'53 road = db.dataset_event_type.str.match(54 '^Transport accident_Road').fillna(False)55 db.loc[road, 'event_type'] = 'D4'56 db.loc[(db.dataset == 'EMDAT') & (db.event_type == ''),57 "event_type"] = 'unassigned'58 assert len(db.loc[(db.dataset == "EMDAT") &59 (db.event_type == '')].index) == 060 return db61def assign_events_ged(db):62 is_ged = db.dataset.str.match('')63 state = db.dataset_event_type.astype(str).str.match('^1$').fillna(False)64 non_state = db.dataset_event_type.astype(65 str).str.match('^2$').fillna(False)66 one_sided = db.dataset_event_type.astype(67 str).str.match('^3$').fillna(False)68 side_a_gov = db.side_a.str.match('Government').fillna(False)69 db.loc[(is_ged & (state | non_state | (one_sided & side_a_gov))),70 'event_type'] = 'B1'71 db.loc[(is_ged & (one_sided & ~side_a_gov)), 'event_type'] = 'B2'72 missing = len(db.loc[is_ged & (db.event_type == '')].index)73 assert missing == 074 return db75def assign_events_prio_bdd(db):76 is_bdd = db.dataset.str.match('').fillna(False)77 # all war78 db.loc[is_bdd, 'event_type'] = 'B1'79 missing = len(db.loc[is_bdd & (db.event_type == '')].index)80 assert missing == 081 return db82def assign_events_acled(db):83 is_acled = db.dataset.str.match('')84 violence = db.dataset_event_type.str.match('^Violence').fillna(False)85 gov_brut = db.dataset_event_type.str.match('_1\d$').fillna(86 False) # Legal Intervention, coded as war87 terror = (violence & ~gov_brut)88 # terrorism89 db.loc[(is_acled & terror), 'event_type'] = 'B2'90 # war91 db.loc[(is_acled & ~terror), 'event_type'] = 'B1'92 assert len(db.loc[is_acled & (db.event_type == '')].index) == 093 return db94def assign_events_scad(db):95 is_scad = db.dataset.str.match('')96 terror = db.dataset_event_type.str.match('^9_').fillna(False)97 # terrorism98 db.loc[(is_scad & terror), 'event_type'] = 'B2'99 # war100 db.loc[(is_scad & ~terror), 'event_type'] = 'B1'101 assert len(db.loc[is_scad & (db.event_type == '')].index) == 0102 return db103def assign_events_iiss(db):104 is_iiss = db.dataset.str.match('')105 terror = db.dataset_event_type.str.match('terrorism').fillna(False)106 # terrorism107 db.loc[(is_iiss & terror), 'event_type'] = 'B2'108 # war109 db.loc[(is_iiss & ~terror), 'event_type'] = 'B1'110 assert len(db.loc[is_iiss & (db.event_type == '')].index) == 0111 return db112def assign_events_war_supp_2015(db):113 # conditions / coding logic114 is_war_supp_2015 = db.dataset.str.match('')115 terror = db.dataset_event_type.str.match('Terroris').fillna(False)116 # terrorism117 db.loc[(is_war_supp_2015 & terror), 'event_type'] = 'B2'118 # war119 db.loc[(is_war_supp_2015 & ~terror), 'event_type'] = 'B1'120 assert len(db.loc[is_war_supp_2015 & (db.event_type == '')].index) == 0121 return db122def assign_events_war_supp_2014a(db):123 # conditions / coding logic124 is_war_supp_2014a = db.dataset.str.match('')125 # war126 db.loc[(is_war_supp_2014a), 'event_type'] = 'B1'127 assert len(db.loc[is_war_supp_2014a & (db.event_type == '')].index) == 0128 return db129def assign_events_war_exov(db):130 is_war_exov = db.dataset.str.match('')131 war = db.dataset_event_type.str.match('inj_war').fillna(False)132 poison = db.dataset_event_type.str.match('inj_poisoning').fillna(133 False) 134 db.loc[(is_war_exov & war), 'event_type'] = 'B1'135 # poisoning136 db.loc[(is_war_exov & poison), 'event_type'] = 'D9'137 assert len(db.loc[is_war_exov & (db.event_type == '')].index) == 0138 return db139def assign_events_dod(db):140 # conditions141 is_dod = db.dataset.str.match('')142 war = db.dataset_event_type.str.match('inj_war_war')143 terror = db.dataset_event_type.str.match('inj_war_terrorism')144 # war145 db.loc[is_dod & war, 'event_type'] = 'B1.2'146 # terror147 db.loc[is_dod & terror, 'event_type'] = 'B2.2'148 assert len(db.loc[is_dod & (db.event_type == '')].index) == 0149 return db150def assign_events_terror_scrape(db):151 # conditions152 is_ts = db.dataset.str.match('')153 154 db.loc[is_ts, 'event_type'] = 'B2'155 assert len(db.loc[is_ts & (db.event_type == '')].index) == 0156 return db157def assign_events_disaster_exov(db):158 is_dis_exov = db.dataset.str.match('')159 disaster = db.dataset_event_type.str.match('inj_disaster').fillna(False)160 poison = db.dataset_event_type.str.match('inj_poisoning').fillna(False)161 trans = db.dataset_event_type.str.match('inj_trans_other').fillna(False)162 fire = db.dataset_event_type.str.match('inj_fire').fillna(False)163 # disaster164 db.loc[(is_dis_exov & disaster), 'event_type'] = 'A'165 # poisoning166 db.loc[(is_dis_exov & poison), 'event_type'] = 'D9'167 # trans168 db.loc[(is_dis_exov & trans), 'event_type'] = 'D5'169 # fires170 db.loc[(is_dis_exov & fire), 'event_type'] = 'D2'171 assert len(db.loc[is_dis_exov & (db.event_type == '')].index) == 0172 return db173def assign_events_dis_supp_15(db):174 is_dis_supp_15 = db.dataset.str.match('')175 non_emdat_type = db.dataset_event_type.str.match('__').fillna(False)176 flood = db.dataset_event_type.str.match(177 '(?i).*flood', re.IGNORECASE).fillna(False)178 quake = db.dataset_event_type.str.match(179 '(?i).*earthquake', re.IGNORECASE).fillna(False)180 storm = (181 db.dataset_event_type.str.match(182 '(?i).*storm',183 re.IGNORECASE).fillna(False) | db.dataset_event_type.str.match(184 '(?i).*typhoon',185 re.IGNORECASE).fillna(False) | db.dataset_event_type.str.match(186 '(?i).*severe weather',187 re.IGNORECASE).fillna(False) | db.dataset_event_type.str.match(188 '(?i).*extreme weather',189 re.IGNORECASE).fillna(False) | db.dataset_event_type.str.match(190 '(?i).*hurricane',191 re.IGNORECASE).fillna(False) | db.dataset_event_type.str.match(192 '(?i).*cyclone',193 re.IGNORECASE).fillna(False) | db.dataset_event_type.str.match(194 '(?i).*tornado',195 re.IGNORECASE).fillna(False))196 measles = db.dataset_event_type.str.match(197 '(?i).*measles', re.IGNORECASE).fillna(False)198 heat_cold = (db.dataset_event_type.str.match(199 '(?i).*heat',200 re.IGNORECASE).fillna(False) | db.dataset_event_type.str.match(201 '(?i).*snowfall',202 re.IGNORECASE).fillna(False))203 slide = db.dataset_event_type.str.match(204 '(?i).*slide', re.IGNORECASE).fillna(False)205 fire = db.dataset_event_type.str.match(206 '(?i).*fire', re.IGNORECASE).fillna(False)207 drought = db.dataset_event_type.str.match(208 '(?i).*drought', re.IGNORECASE).fillna(False)209 volcanos = db.dataset_event_type.str.match(210 '(?i).*eruption', re.IGNORECASE).fillna(False)211 # floods212 db.loc[(is_dis_supp_15 & non_emdat_type & flood), 'event_type'] = 'A6'213 # quake214 db.loc[(is_dis_supp_15 & non_emdat_type & quake), 'event_type'] = 'A2'215 # storm216 db.loc[(is_dis_supp_15 & non_emdat_type & storm), 'event_type'] = 'A5'217 # slide218 db.loc[(is_dis_supp_15 & non_emdat_type & slide), 'event_type'] = 'A4'219 # fires220 db.loc[(is_dis_supp_15 & non_emdat_type & fire), 'event_type'] = 'D2'221 # drought222 db.loc[(is_dis_supp_15 & non_emdat_type & drought), 'event_type'] = 'C'223 # volcanos224 db.loc[(is_dis_supp_15 & non_emdat_type & volcanos), 'event_type'] = 'A3'225 # heat/cold waves226 db.loc[(is_dis_supp_15 & non_emdat_type & heat_cold), 'event_type'] = 'D8'227 # Measles228 db.loc[(is_dis_supp_15 & non_emdat_type & measles), 'event_type'] = 'E3'229 db.loc[is_dis_supp_15 & (db.event_type == ''), 'event_type'] = 'E4'230 assert len(db.loc[is_dis_supp_15 & (db.event_type == '')].index) == 0231 return db232def assign_events_dis_supp_16(db):233 # conditions / coding logic234 is_dis_supp_16 = db.dataset.str.match('')235 flood = db.dataset_event_type.str.match(236 '(?i).*flood', re.IGNORECASE).fillna(False)237 quake = db.dataset_event_type.str.match(238 '(?i).*earthquake', re.IGNORECASE).fillna(False)239 storm = (240 db.dataset_event_type.str.match(241 '(?i).*storm',242 re.IGNORECASE).fillna(False) | db.dataset_event_type.str.match(243 '(?i).*severe weather',244 re.IGNORECASE).fillna(False) | db.dataset_event_type.str.match(245 '(?i).*typhoon',246 re.IGNORECASE).fillna(False) | db.dataset_event_type.str.match(247 '(?i).*hurricane',248 re.IGNORECASE).fillna(False) | db.dataset_event_type.str.match(249 '(?i).*cyclone',250 re.IGNORECASE).fillna(False) | db.dataset_event_type.str.match(251 '(?i).*tornado',252 re.IGNORECASE).fillna(False))253 heat_cold = db.dataset_event_type.str.match(254 '(?i).*heatwave',re.IGNORECASE).fillna(False)255 slide = db.dataset_event_type.str.match(256 '(?i).*slide', re.IGNORECASE).fillna(False)257 fire = db.dataset_event_type.str.match(258 '(?i).*fire', re.IGNORECASE).fillna(False)259 drought = db.dataset_event_type.str.match(260 '(?i).*drought', re.IGNORECASE).fillna(False)261 volcanos = db.dataset_event_type.str.match(262 '(?i).*eruption', re.IGNORECASE).fillna(False)263 air = db.dataset_event_type.str.match(264 '(?i).*air', re.IGNORECASE).fillna(False)265 train = db.dataset_event_type.str.match(266 '(?i).*train', re.IGNORECASE).fillna(False)267 collapse = db.dataset_event_type.str.match(268 '(?i).*collapse', re.IGNORECASE).fillna(False)269 explosion = db.dataset_event_type.str.match(270 '(?i).*explosion', re.IGNORECASE).fillna(False)271 inj_mech_other = db.dataset_event_type.str.match(272 '(?i).*industrial', re.IGNORECASE).fillna(False)273 war = db.dataset_event_type.str.match(274 '(?i).*war', re.IGNORECASE).fillna(False)275 terror = db.dataset_event_type.str.match(276 '(?i).*terror', re.IGNORECASE).fillna(False)277 LI = db.dataset_event_type.str.match(278 '(?i).*legal intervention',279 re.IGNORECASE).fillna(False)280 # floods281 db.loc[(is_dis_supp_16 & flood), 'event_type'] = 'A6'282 # quake283 db.loc[(is_dis_supp_16 & quake), 'event_type'] = 'A2'284 # storm285 db.loc[(is_dis_supp_16 & storm), 'event_type'] = 'A5'286 # slide287 db.loc[(is_dis_supp_16 & slide), 'event_type'] = 'A4'288 # fires289 db.loc[(is_dis_supp_16 & fire), 'event_type'] = 'D2'290 # drought291 db.loc[(is_dis_supp_16 & drought), 'event_type'] = 'C'292 # volcanos293 db.loc[(is_dis_supp_16 & volcanos), 'event_type'] = 'A3'294 # air295 db.loc[(is_dis_supp_16 & air), 'event_type'] = 'D5'296 # train297 db.loc[(is_dis_supp_16 & train), 'event_type'] = 'D5'298 # collapse, explosion --> inj_mech_other299 db.loc[(is_dis_supp_16 & (collapse | explosion | inj_mech_other)),300 'event_type'] = 'D3'301 # war302 db.loc[(is_dis_supp_16 & (war | LI)), 'event_type'] = 'B1'303 # terror304 db.loc[(is_dis_supp_16 & terror), 'event_type'] = 'B2'305 # heat/cold waves306 db.loc[(is_dis_supp_16 & terror), 'event_type'] = 'D8'307 db.loc[is_dis_supp_16 & (db.event_type == ''), 'event_type'] = 'unassigned'308 assert len(db.loc[is_dis_supp_16 & (db.event_type == '')].index) == 0309 return db310def assign_events_epidemics(db):311 # cholera312 cholera = db.dataset_event_type.str.match('^cholera').fillna(False)313 db.loc[cholera, 'event_type'] = 'E1.1'314 # meningococcal meningitis315 mm = db.dataset_event_type.str.match('^meningitis').fillna(False)316 db.loc[mm, 'event_type'] = 'E2'317 assert len(db.loc[(db.dataset == "Epidemics")318 & (db.event_type == '')].index) == 0319 return db320def assign_events_ebola_who_pre2014(db):321 is_ebola_who_pre2014 = db.dataset.str.match('')322 db.loc[is_ebola_who_pre2014, 'event_type'] = 'E8'323 return db324def assign_events_ebola_2017update(db):325 is_ebola_2017update = db.dataset.str.match(326 'ebola_wAfrica_2014_update_2017')327 db.loc[is_ebola_2017update, 'event_type'] = 'E8'328 return db329def assign_events_ebola_mort_apport_2016(db):330 is_ebola_mort_aport = db.dataset.str.match('')331 db.loc[is_ebola_mort_aport, 'event_type'] = 'E8'332 return db333def assign_events_ebola_ic(db):334 is_ebola_ic = db.dataset.str.match('')335 db.loc[is_ebola_ic, 'event_type'] = 'E8'336 return db337def assign_events_mina_crowd_collapse(db):338 ''' Crowd collapse is inj_mech_other '''339 is_mina_cc = db.dataset.str.match('')340 db.loc[is_mina_cc, 'event_type'] = 'D3'341 return db342def assign_events_syr_supp(db):343 is_syr_supp = db.dataset.str.match('')344 db.loc[is_syr_supp, 'event_type'] = 'B1'345 return db346def assign_events_mzr(db):347 is_mzr = db.dataset.str.match('')348 db.loc[is_mzr, 'event_type'] = 'B1'349 return db350def assign_events_irq_ims_ibc(db):351 is_irq_avg = db.dataset.str.match('')352 db.loc[is_irq_avg, 'event_type'] = 'B1'353 return db354def assign_events_phl_supp_2015(db):355 # conditions / logic356 is_phl_supp_2015 = db.dataset.str.contains('').fillna(False)357 war = (358 db.dataset_event_type.str.contains(359 'war',360 case=False).fillna(False) | (361 db.dataset_event_type.str.contains(362 'conflict',363 case=False).fillna(False) & db.dataset_notes.str.contains(364 'war',365 case=False).fillna(False)))366 storm = db.dataset_event_type.str.contains(367 'cyclone|typhoon|tyhpoon|storm|tornado',368 case=False).fillna(False)369 flood = db.dataset_event_type.str.contains(370 'flood', case=False).fillna(False)371 slide = db.dataset_event_type.str.contains(372 'landslide', case=False).fillna(False)373 quake = db.dataset_event_type.str.contains(374 'earthquake', case=False).fillna(False)375 tsunami = db.dataset_event_type.str.contains(376 'tsunami', case=False).fillna(False)377 drought = db.dataset_event_type.str.contains(378 'drought', case=False).fillna(False)379 volcano = db.dataset_event_type.str.contains(380 'volcano', case=False).fillna(False)381 other_disaster = db.dataset_event_type.str.contains(382 'wave|winter', case=False).fillna(False)383 broad_disaster = (384 (db.dataset_event_type.str.match(385 '^disaster;;$',386 case=False).fillna(False) | db.dataset_event_type.str.match(387 '^;;$',388 case=False).fillna(False)) & db.dataset_notes.str.contains(389 'source maggie found',390 case=False).fillna(False))391 measles = db.dataset_event_type.str.contains(392 'measles', case=False).fillna(False)393 malaria = db.dataset_event_type.str.contains(394 'malaria', case=False).fillna(False)395 smallpox = db.dataset_event_type.str.contains(396 'small pox', case=False).fillna(False)397 other_diarrhea = db.dataset_event_type.str.contains(398 'diarrhoeal', case=False).fillna(False)399 # assignments400 db.loc[is_phl_supp_2015 & war, 'event_type'] = 'B1'401 db.loc[is_phl_supp_2015 & storm, 'event_type'] = 'A5'402 db.loc[is_phl_supp_2015 & flood, 'event_type'] = 'A6'403 db.loc[is_phl_supp_2015 & slide, 'event_type'] = 'A4'404 db.loc[is_phl_supp_2015 & quake, 'event_type'] = 'A2'405 db.loc[is_phl_supp_2015 & tsunami, 'event_type'] = 'A7'406 db.loc[is_phl_supp_2015 & volcano, 'event_type'] = 'A3'407 db.loc[is_phl_supp_2015 & drought, 'event_type'] = 'C'408 db.loc[is_phl_supp_2015 & other_disaster, 'event_type'] = 'A7'409 db.loc[is_phl_supp_2015 & broad_disaster, 'event_type'] = 'A'410 db.loc[is_phl_supp_2015 & measles, 'event_type'] = 'E3'411 db.loc[is_phl_supp_2015 & malaria, 'event_type'] = 'E5'412 db.loc[is_phl_supp_2015 & smallpox, 'event_type'] = 'E7'413 db.loc[is_phl_supp_2015 & other_diarrhea, 'event_type'] = 'E1'414 return db415def assign_events_pse_supp_2015(db):416 # conditions / logic417 is_pse_supp_2015 = db.dataset.str.contains('').fillna(False)418 war = db.dataset_event_type.str.contains('war', case=False).fillna(False)419 # assignments420 db.loc[is_pse_supp_2015 & war, 'event_type'] = 'B1'421 return db422def assign_events_multi_supp_2015(db):423 # conditions / logic424 is_multi_supp_2015 = db.dataset.str.contains(425 'multi_supp_2015').fillna(False)426 war = db.dataset_event_type.str.contains(427 'war|conflict', case=False).fillna(False)428 slide = db.dataset_event_type.str.contains(429 'avalanche', case=False).fillna(False)430 storm = db.dataset_event_type.str.contains(431 'storm|cyclone', case=False).fillna(False)432 flood = db.dataset_event_type.str.contains(433 'flood', case=False).fillna(False)434 poison = db.dataset_event_type.str.contains(435 'poisioning', case=False).fillna(False) 436 # assignments437 db.loc[is_multi_supp_2015 & war, 'event_type'] = 'B1'438 db.loc[is_multi_supp_2015 & slide, 'event_type'] = 'A4'439 db.loc[is_multi_supp_2015 & storm, 'event_type'] = 'A5'440 db.loc[is_multi_supp_2015 & poison, 'event_type'] = 'D9'441 db.loc[is_multi_supp_2015 & flood, 'event_type'] = 'A6'...
data_preparation.py
Source:data_preparation.py
1from ROOT import *2# from root_numpy import root2array3import script_utils as script_utils4import numpy as np5import pandas as pd6import BDT_file_handler as BDT_fh7import pickle8def root_to_csv_KDE():9 """10 Detail:11 Transform root TTree to .csv files to store the data12 Args:13 bolo_name (str) = bolometer name14 data_dir (str) = data directory (ROOT TTree files)15 analysis_type (str) = type of analysis (which cuts)16 event_type (str) = event class17 d_event_dir (dict) = dict to get the proper directory of each event class 18 bool_train (bool) = boolean to pick test/training sample19 Returns:20 void21 Raises:22 void 23 """24 bolo_name = "FID837"25 data_dir = "./ROOT_files/ana_0.5_0_5_for_KDE/"26 arr = root2array(data_dir + bolo_name + "_WIMP_mass_10_tree.root", "t_new0")27 np.savetxt(data_dir + bolo_name + "_WIMP_mass_10.csv", arr, delimiter = ",", fmt = "%.5f", header = "EC1,EC2,EIA,EIB,EIC,EID,HR", comments="")28# root_to_csv_KDE()29def root_to_csv(bolo_name, data_dir, analysis_type, event_type, d_event_dir, bool_train):30 """31 Detail:32 Transform root TTree to .csv files to store the data33 Args:34 bolo_name (str) = bolometer name35 data_dir (str) = data directory (ROOT TTree files)36 analysis_type (str) = type of analysis (which cuts)37 event_type (str) = event class38 d_event_dir (dict) = dict to get the proper directory of each event class 39 bool_train (bool) = boolean to pick test/training sample40 Returns:41 void42 Raises:43 void 44 """45 if event_type == "realdata":46 data_dir = "/home/irfulx204/mnt/tmain/Desktop/Run308_Analyse_ERA/Fond_ERA_merged/"47 arr = root2array(data_dir + bolo_name + "_" + analysis_type + d_event_dir[event_type] + "_fond.root", "data")48 # arrbis = []49 # for i in range(arr.shape[0]):50 # fid = 0.5*(arr["EIB"][i]+arr["EID"][i])51 # if fid<0.7:52 # arrbis.append([arr["EC1"][i],arr["EC2"][i],arr["EIA"][i],arr["EIB"][i],arr["EIC"][i],arr["EID"][i],arr["HR"][i]])53 out_dir = script_utils.create_directory("./Eval_data/" + bolo_name + "/" + analysis_type + "/")54 np.savetxt(out_dir + bolo_name + "_" + analysis_type + d_event_dir[event_type] + "_fond.csv", arr, delimiter = ",", fmt = "%.5f", header = "EC1,EC2,EIA,EIB,EIC,EID,EC,EFID,HR,RUN,SN", comments = "") 55 # np.savetxt(out_dir + bolo_name + "_heatonly_from_fond.csv", arrbis, delimiter = ",", fmt = "%.5f", header = "EC1,EC2,EIA,EIB,EIC,EID,HR", comments = "") 56 else:57 data_dir += "BDT_" + bolo_name + "/" + analysis_type + "/" + d_event_dir[event_type] + "/ROOT_files/"58 arr = root2array(data_dir + bolo_name + "_" + event_type + "_tree.root", "t_new" + str(bool_train))59 if bool_train == 0:60 out_dir = script_utils.create_directory("/home/irfulx204/mnt/tmain/Desktop/BDT_Scikit/Training_data/" + bolo_name + "/" + analysis_type + "/")61 np.savetxt(out_dir + bolo_name + "_" + event_type + ".csv", arr, delimiter = ",", fmt = "%.5f", header = "EC1,EC2,EIA,EIB,EIC,EID,HR", comments="")62 else:63 out_dir = script_utils.create_directory("/home/irfulx204/mnt/tmain/Desktop/BDT_Scikit/Test_data/" + bolo_name + "/" + analysis_type + "/")64 np.savetxt(out_dir + bolo_name + "_" + event_type + ".csv", arr, delimiter = ",", fmt = "%.5f", header = "EC1,EC2,EIA,EIB,EIC,EID,HR", comments="")65def get_data_array(bolo_name, bool_train, analysis_type, MVA_tag, list_event_type, exposure, list_variables, **kwargs):66 """67 Detail:68 Load .csv data as pandas tables store to dict 69 Args:70 bolo_name (str) = bolometer name71 bool_train (bool) = boolean to pick test/training sample72 analysis_type (str) = type of analysis (which cuts)73 MVA_tag (str) = tag to select the MVA scaling files74 list_event_type (list) = list of event class75 exposure (float) = exposure duration in days76 datasplit (str, kwarg) = fraction of data to be randomly selected77 d_num_events (dict, kwarg) = number of training event dict per class78 list_variables (list) = list of variables to retain for BDT79 Returns:80 d_array (dict) = a dict with the training or test data for each class81 Raises:82 d_array 83 """84 #Get scaling dict to set the weights85 d_scaling = BDT_fh.open_MVA_scaling_file(bolo_name, analysis_type, MVA_tag)86 d_array = {}87 data_dir = ""88 if not bool_train:89 data_dir = script_utils.create_directory("/home/irfulx204/mnt/tmain/Desktop/BDT_Scikit/Training_data/" + bolo_name + "/" + analysis_type + "/")90 else:91 data_dir = script_utils.create_directory("/home/irfulx204/mnt/tmain/Desktop/BDT_Scikit/Test_data/" + bolo_name + "/" + analysis_type + "/")92 list_bckg_type = [elem for elem in list_event_type if "WIMP" not in elem]93 list_signal_type = [elem for elem in list_event_type if "WIMP" in elem]94 95 # kde_dir = script_utils.create_directory("./KDE_test/ana_0.5_0_5_for_KDE/" )96 # kde_file = open(kde_dir + bolo_name + "_KDE_heatonly.pkl", "rb")97 # kde = pickle.load(kde_file)98 # kde_file.close()99 for event_type in list_bckg_type :100 d_array[event_type] = pd.read_csv(data_dir + bolo_name + "_" + event_type + ".csv", usecols = ["EC1","EC2","EIA","EIB","EIC","EID", "HR"])101 col_EC = 0.5*(d_array[event_type]["EC1"] + d_array[event_type]["EC2"])102 col_EI = 0.5*(d_array[event_type]["EIA"] + d_array[event_type]["EIB"] + d_array[event_type]["EIC"] + d_array[event_type]["EID"])103 col_EIFID = 0.5*(d_array[event_type]["EIB"] + d_array[event_type]["EID"])104 col_EIS1 = 0.5*(d_array[event_type]["EIA"] + d_array[event_type]["EIB"])105 col_EIS2 = 0.5*(d_array[event_type]["EIC"] + d_array[event_type]["EID"])106 col_EI_for_ER = ( 1.5*d_array[event_type]["EIA"] + 4*d_array[event_type]["EIB"] + 1.5*d_array[event_type]["EIC"] + 4*d_array[event_type]["EID"])107 # d_array[event_type]["ECdiff"] = np.log(2+d_array[event_type]["EC1"] - d_array[event_type]["EC2"])108 d_array[event_type]["ECprod"] = (d_array[event_type]["EC1"]*d_array[event_type]["EC2"])109 d_array[event_type]["EFIDprod"] = np.log(5+d_array[event_type]["EIB"]*d_array[event_type]["EID"])110 d_array[event_type]["Evetprod"] = (d_array[event_type]["EIA"]*d_array[event_type]["EIC"])111 # d_array[event_type]["EIprod"] = (d_array[event_type]["EFIDprod"]*d_array[event_type]["Evetprod"])112 d_array[event_type]["ECdiff"] = (d_array[event_type]["EC1"] - d_array[event_type]["EC2"]) /(d_array[event_type]["EC1"] + d_array[event_type]["EC2"])113 d_array[event_type]["EC"] = 0.5*(d_array[event_type]["EC1"] + d_array[event_type]["EC2"] )114 d_array[event_type]["EFID"] = 0.5*(d_array[event_type]["EIB"] + d_array[event_type]["EID"])115 # d_array[event_type]["EFIDdiff"] = np.log(1+np.abs((d_array[event_type]["EIB"] - d_array[event_type]["EID"])/(d_array[event_type]["EIB"] + d_array[event_type]["EID"])))116 d_array[event_type]["EFIDdiff"] = (d_array[event_type]["EIB"] - d_array[event_type]["EID"]) #/(d_array[event_type]["EIB"] + d_array[event_type]["EID"])117 d_array[event_type]["sum_ion"] = (d_array[event_type]["EIB"] + d_array[event_type]["EID"] + d_array[event_type]["EIA"] + d_array[event_type]["EIC"]) #/(d_array[event_type]["EIB"] + d_array[event_type]["EID"])118 # d_array[event_type]["EFIDdiff"] = (d_array[event_type]["EIB"] - d_array[event_type]["EID"])/(d_array[event_type]["EIB"] + d_array[event_type]["EID"])119 temp_df = pd.concat([col_EIS1, col_EIS2,col_EIFID], axis=1, keys = ["EIS1", "EIS2", "EFID"])120 d_array[event_type]["max_ion"] = temp_df[["EIS1", "EIS2", "EFID"]].max(axis=1)121 d_array[event_type]["max_vet"] = temp_df[["EIS1", "EIS2"]].max(axis=1)122 d_array[event_type]["max_vet"] = d_array[event_type][["EIA", "EIC"]].max(axis=1)123 # d_array[event_type]["test"] = np.log(1+ np.abs(col_EIFID -0.16*np.power(abs((1+8./3)*col_EC-0.333*col_EI_for_ER), 1.18)))124 d_array[event_type]["test"] = col_EIFID -0.16*np.power(abs((1+8./3)*col_EC-0.333*col_EI_for_ER), 1.18)125 d_array[event_type]["testEC"] = (d_array[event_type]["EC1"] + d_array[event_type]["EC2"])*(col_EIFID -0.16*np.power(abs((1+8./3)*col_EC-0.333*col_EI_for_ER), 1.18))126 # d_array[event_type]["test"] = np.log(10+(d_array[event_type]["EIB"] - 0.16*np.power(abs((1+8./3)*col_EC - 0.333*col_EI_for_ER), 1.18))*(d_array[event_type]["EID"] - 0.16*np.power(abs((1+8./3)*col_EC - 0.333*col_EI_for_ER), 1.18)))127 d_array[event_type]["ER"] = (1+8./3)*col_EC - 0.333*col_EI_for_ER128 d_array[event_type]["prod"] = d_array[event_type]["EC1"]*d_array[event_type]["EC2"]*d_array[event_type]["EIB"]*d_array[event_type]["EID"]/(1+d_array[event_type]["EIA"]*d_array[event_type]["EIC"])129 d_array[event_type]["to1_3"] = np.power(d_array[event_type]["EC1"] -1.1,2) + np.power(d_array[event_type]["EC2"] -1.1,2)130 d_array[event_type]["to1_3"]+= np.power(d_array[event_type]["EIA"],2) + np.power(d_array[event_type]["EIB"] -1.1,2)131 d_array[event_type]["to1_3"]+= np.power(d_array[event_type]["EIC"],2) + np.power(d_array[event_type]["EID"] -1.1,2)132 col_Q = np.abs(col_EI/d_array[event_type]["ER"])133 d_array[event_type]["Q"] = np.amin( np.concatenate( (np.reshape(col_Q, (col_Q.shape[0], 1)), 2*np.ones((col_Q.shape[0],1))), axis=1 ), axis=1)134 #####################135 #####################136 #####################137 # print event_type, len(d_array[event_type])138 d_array[event_type] = d_array[event_type][list_variables]139 if kwargs.has_key("datasplit"):140 split = kwargs["datasplit"]141 len_data = d_array[event_type].shape[0]142 d_array[event_type] = d_array[event_type][:int(split*len_data)]143 if kwargs.has_key("d_num_events"):144 d_array[event_type] = d_array[event_type][:kwargs["d_num_events"][event_type]]145 d_array[event_type]["EventID"] = event_type146 d_array[event_type]["weight"] = float(d_scaling["prop_" + event_type])*float(d_scaling["exp_per_day"])*exposure/(len(d_array[event_type]))*np.ones((len(d_array[event_type])))147 d_array[event_type]["tag"] = np.zeros((len(d_array[event_type])))148 # array = d_array[event_type][["EC1", "EC2", "EIA", "EIB", "EIC", "EID"]].values149 # array = kde.score_samples(array)150 # d_array[event_type]["prob"] = array151 # d_array[event_type] = d_array[event_type][list_variables + ["prob", "EventID", "weight", "tag"]]152 #Get sum of bckg weights153 sum_bckg_weights = sum( [ float(d_scaling["prop_" + event_type])*float(d_scaling["exp_per_day"])*exposure for event_type in list_bckg_type ] )154 for event_type in list_signal_type :155 d_array[event_type] = pd.read_csv(data_dir + bolo_name + "_" + event_type + ".csv", usecols = ["EC1","EC2","EIA","EIB","EIC","EID", "HR"])156 col_EC = 0.5*(d_array[event_type]["EC1"] + d_array[event_type]["EC2"])157 col_EI = 0.5*(d_array[event_type]["EIA"] + d_array[event_type]["EIB"] + d_array[event_type]["EIC"] + d_array[event_type]["EID"])158 col_EIFID = 0.5*(d_array[event_type]["EIB"] + d_array[event_type]["EID"])159 col_EIS1 = 0.5*(d_array[event_type]["EIA"] + d_array[event_type]["EIB"])160 col_EIS2 = 0.5*(d_array[event_type]["EIC"] + d_array[event_type]["EID"])161 col_EI_for_ER = ( 1.5*d_array[event_type]["EIA"] + 4*d_array[event_type]["EIB"] + 1.5*d_array[event_type]["EIC"] + 4*d_array[event_type]["EID"])162 # d_array[event_type]["ECdiff"] = np.log(2+d_array[event_type]["EC1"] - d_array[event_type]["EC2"])163 d_array[event_type]["ECprod"] = (d_array[event_type]["EC1"]*d_array[event_type]["EC2"])164 d_array[event_type]["EFIDprod"] = np.log(5+d_array[event_type]["EIB"]*d_array[event_type]["EID"])165 d_array[event_type]["Evetprod"] = (d_array[event_type]["EIA"]*d_array[event_type]["EIC"])166 # d_array[event_type]["EIprod"] = (d_array[event_type]["EFIDprod"]*d_array[event_type]["Evetprod"])167 d_array[event_type]["ECdiff"] = (d_array[event_type]["EC1"] - d_array[event_type]["EC2"]) /(d_array[event_type]["EC1"] + d_array[event_type]["EC2"])168 d_array[event_type]["EC"] = 0.5*(d_array[event_type]["EC1"] + d_array[event_type]["EC2"] )169 d_array[event_type]["EFID"] = 0.5*(d_array[event_type]["EIB"] + d_array[event_type]["EID"])170 # d_array[event_type]["EFIDdiff"] = np.log(1+np.abs((d_array[event_type]["EIB"] - d_array[event_type]["EID"])/(d_array[event_type]["EIB"] + d_array[event_type]["EID"])))171 d_array[event_type]["EFIDdiff"] = (d_array[event_type]["EIB"] - d_array[event_type]["EID"]) #/(d_array[event_type]["EIB"] + d_array[event_type]["EID"])172 d_array[event_type]["sum_ion"] = (d_array[event_type]["EIB"] + d_array[event_type]["EID"] + d_array[event_type]["EIA"] + d_array[event_type]["EIC"]) #/(d_array[event_type]["EIB"] + d_array[event_type]["EID"])173 # d_array[event_type]["EFIDdiff"] = (d_array[event_type]["EIB"] - d_array[event_type]["EID"])/(d_array[event_type]["EIB"] + d_array[event_type]["EID"])174 temp_df = pd.concat([col_EIS1, col_EIS2,col_EIFID], axis=1, keys = ["EIS1", "EIS2", "EFID"])175 d_array[event_type]["max_ion"] = temp_df[["EIS1", "EIS2", "EFID"]].max(axis=1)176 d_array[event_type]["max_vet"] = temp_df[["EIS1", "EIS2"]].max(axis=1)177 d_array[event_type]["max_vet"] = d_array[event_type][["EIA", "EIC"]].max(axis=1)178 # d_array[event_type]["test"] = np.log(1+ np.abs(col_EIFID -0.16*np.power(abs((1+8./3)*col_EC-0.333*col_EI_for_ER), 1.18)))179 d_array[event_type]["test"] = col_EIFID -0.16*np.power(abs((1+8./3)*col_EC-0.333*col_EI_for_ER), 1.18)180 d_array[event_type]["testEC"] = (d_array[event_type]["EC1"] + d_array[event_type]["EC2"])*(col_EIFID -0.16*np.power(abs((1+8./3)*col_EC-0.333*col_EI_for_ER), 1.18))181 # d_array[event_type]["test"] = np.log(10+(d_array[event_type]["EIB"] - 0.16*np.power(abs((1+8./3)*col_EC - 0.333*col_EI_for_ER), 1.18))*(d_array[event_type]["EID"] - 0.16*np.power(abs((1+8./3)*col_EC - 0.333*col_EI_for_ER), 1.18)))182 d_array[event_type]["ER"] = (1+8./3)*col_EC - 0.333*col_EI_for_ER183 d_array[event_type]["prod"] = d_array[event_type]["EC1"]*d_array[event_type]["EC2"]*d_array[event_type]["EIB"]*d_array[event_type]["EID"]/(1+d_array[event_type]["EIA"]*d_array[event_type]["EIC"])184 d_array[event_type]["to1_3"] = np.power(d_array[event_type]["EC1"] -1.1,2) + np.power(d_array[event_type]["EC2"] -1.1,2)185 d_array[event_type]["to1_3"]+= np.power(d_array[event_type]["EIA"],2) + np.power(d_array[event_type]["EIB"] -1.1,2)186 d_array[event_type]["to1_3"]+= np.power(d_array[event_type]["EIC"],2) + np.power(d_array[event_type]["EID"] -1.1,2)187 col_Q = np.abs(col_EI/d_array[event_type]["ER"])188 d_array[event_type]["Q"] = np.amin( np.concatenate( (np.reshape(col_Q, (col_Q.shape[0], 1)), 2*np.ones((col_Q.shape[0],1))), axis=1 ), axis=1)189 d_array[event_type] = d_array[event_type][list_variables]190 if kwargs.has_key("datasplit"):191 split = kwargs["datasplit"]192 len_data = d_array[event_type].shape[0]193 d_array[event_type] = d_array[event_type][:int(split*len_data)]194 if kwargs.has_key("d_num_events"):195 d_array[event_type] = d_array[event_type][:kwargs["d_num_events"][event_type]]196 d_array[event_type]["EventID"] = event_type197 d_array[event_type]["weight"] = sum_bckg_weights/(len(d_array[event_type]))*np.ones((len(d_array[event_type])))198 # d_array[event_type]["weight"] = 1./(len(d_array[event_type]))*np.ones((len(d_array[event_type])))199 d_array[event_type]["tag"] = np.ones((len(d_array[event_type])))200 # array = d_array[event_type][["EC1", "EC2", "EIA", "EIB", "EIC", "EID"]].values201 # array = kde.score_samples(array)202 # d_array[event_type]["prob"] = array203 # d_array[event_type] = d_array[event_type][list_variables + ["prob", "EventID", "weight", "tag"]]204 return d_array205def get_eval_array(bolo_name, analysis_type, realdata_tag, list_variables):206 """207 Detail:208 Load .csv data as pandas tables store to dict 209 Args:210 bolo_name (str) = bolometer name211 analysis_type (str) = type of analysis (which cuts)212 realdata_tag (str) = tag to identify heat cut files213 list_variables (list) = list of variables to retain for BDT214 Returns:215 d_array (dict) = a dict with the eval data216 Raises:217 d_array 218 """219 d_array = {}220 data_dir = script_utils.create_directory("/home/irfulx204/mnt/tmain/Desktop/BDT_Scikit/Eval_data/" + bolo_name + "/" + analysis_type + "/")221 d_array["realdata"] = pd.read_csv(data_dir + bolo_name + "_" + analysis_type + realdata_tag + "_fond.csv", usecols = ["EC1","EC2","EIA","EIB","EIC","EID","EC","EFID", "HR"])222 col_EC = 0.5*(d_array["realdata"]["EC1"] + d_array["realdata"]["EC2"])223 col_EI = 0.5*( d_array["realdata"]["EIA"] + d_array["realdata"]["EIB"] + d_array["realdata"]["EIC"] + d_array["realdata"]["EID"])224 col_EI_for_ER = ( 1.5*d_array["realdata"]["EIA"] + 4*d_array["realdata"]["EIB"] + 1.5*d_array["realdata"]["EIC"] + 4*d_array["realdata"]["EID"])225 col_EIFID = 0.5*(d_array["realdata"]["EIB"] + d_array["realdata"]["EID"])226 col_EIS1 = 0.5*(d_array["realdata"]["EIA"] + d_array["realdata"]["EIB"])227 col_EIS2 = 0.5*(d_array["realdata"]["EIC"] + d_array["realdata"]["EID"])228 temp_df = pd.concat([col_EIS1, col_EIS2,col_EIFID], axis=1, keys = ["EIS1", "EIS2", "EFID"])229 d_array["realdata"]["max_ion"] = temp_df[["EIS1", "EIS2", "EFID"]].max(axis=1)230 d_array["realdata"]["EFIDdiff"] = (d_array["realdata"]["EIB"] - d_array["realdata"]["EID"])231 d_array["realdata"]["to1_3"] = np.power(d_array["realdata"]["EC1"] -1.1,2) + np.power(d_array["realdata"]["EC2"] -1.1,2)232 d_array["realdata"]["to1_3"]+= np.power(d_array["realdata"]["EIA"],2) + np.power(d_array["realdata"]["EIB"] -1.1,2)233 d_array["realdata"]["to1_3"]+= np.power(d_array["realdata"]["EIC"],2) + np.power(d_array["realdata"]["EID"] -1.1,2)234 d_array["realdata"]["ER"] = (1+8./3)*col_EC - 0.333*col_EI_for_ER235 d_array["realdata"]["test"] = col_EIFID -0.16*np.power(abs((1+8./3)*col_EC-0.333*col_EI_for_ER), 1.18)236 d_array["realdata"]["testEC"] = (d_array["realdata"]["EC1"] + d_array["realdata"]["EC2"])*(col_EIFID -0.16*np.power(abs((1+8./3)*col_EC-0.333*col_EI_for_ER), 1.18))237 d_array["realdata"]["prod"] = d_array["realdata"]["EC1"]*d_array["realdata"]["EC2"]*d_array["realdata"]["EIB"]*d_array["realdata"]["EID"]/(1+d_array["realdata"]["EIA"]*d_array["realdata"]["EIC"])238 d_array["realdata"] = d_array["realdata"][list_variables]239 return d_array240def prepare_data_for_scikit(dict_df):241 """242 Detail:243 Convert the pandas data frame in dict_df 244 into numpy arrays245 Shuffle the arrays246 Args:247 dict_df (dict) = dict with training data as pandas dataframe248 Optional kwarg: d_num_events (dict) = dict to indicate how many training events per class249 Returns:250 x_train, y_train (the feature / class vectors)251 Raises:252 void 253 """254 #Merge data frames of different event types. 255 df = pd.concat( [dict_df[event_type] for event_type in dict_df.keys() ], ignore_index = True )256 #Shuffle data frames257 df = df.ix[np.random.permutation(df.index)]258 #Prepare training and test samples259 x = df.iloc[:,:-3].values260 weight = df.iloc[:,-2].values261 y = df.iloc[:,-1].values...
LambdaTest’s Playwright tutorial will give you a broader idea about the Playwright automation framework, its unique features, and use cases with examples to exceed your understanding of Playwright testing. This tutorial will give A to Z guidance, from installing the Playwright framework to some best practices and advanced concepts.
Get 100 minutes of automation test minutes FREE!!